﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;

namespace HTMLDoc
{
    /*
     * This class contains the htmlsite as a string variable. It's downloaded from the web via the url which
     * is the parameter in the constructor.
     * The url for the htmlsite is saved in a instance varable called url and all the links which is found
     * in that htmlsite is saved as a List<string> in this class.
     */
      
    public class HtmlDoc
    {
        private const string pattern = "href=\"[a-zA-Z0-9./:&\\d_-]+"; // links regex 
        private List<string> links;
        private string htmlDocument;
        private string url;
        
        public HtmlDoc(string url)
        {
            this.url = url;
            htmlDocument = "";
            GetHMTLDoc(url); // Saves the htmlsite as a string instance variable called htmlDocument.
            links = GetLinks(htmlDocument); // Gets all links which is captured in the htmlDocument. Saves them to a List.
        }

        public string HtmlDocument
        {
            get { return htmlDocument; }
            set { htmlDocument = value; }
        }

        public List<string> Links
        {
            get { return links; }
            set { links = value; }
        }

        public string Url
        {
            get { return url; }
            set { url = value; }
        }

        //PRE: The url needs to be valid.
        //POST: The htmlsite is downloaded as a string.
        private void GetHMTLDoc(string url)
        {
            try
            {
                WebClient client = new WebClient();
                htmlDocument = client.DownloadString(url);
            }
            catch (WebException e)
            {
                Console.WriteLine("EXCEPTION!!!!: " + e.Message); 

            }
        }

        //PRE: The htmlDocument needs to be downloaded.
        //POST: A list with the links in the htmlDocument is created.
        private List<string> GetLinks(string htmlDocument)
        {
            MatchCollection mc = Regex.Matches(htmlDocument, pattern);
            List<string> linksInHtml = new List<string>();

            string i = "";
            
            foreach (Match match in mc)
            {
                i = match.Value.Replace("href=\"", "");
                linksInHtml.Add(i);
            }
            return linksInHtml;
        }
    }
}
